In [1]:
import pandas as pd
import numpy as np
import seaborn as sbn
import altair as alt
import scipy.stats as stats
import matplotlib.pyplot as plt
In [2]:
file = "/Users/anair/Downloads/cleaned_players.csv"
df = pd.read_csv(file)
In [3]:
df.columns
Out[3]:
Index(['first_name', 'second_name', 'goals_scored', 'assists', 'total_points',
       'minutes', 'goals_conceded', 'creativity', 'influence', 'threat',
       'bonus', 'bps', 'ict_index', 'clean_sheets', 'red_cards',
       'yellow_cards', 'selected_by_percent', 'now_cost', 'element_type'],
      dtype='object')
In [4]:
df.sample(20)
Out[4]:
first_name second_name goals_scored assists total_points minutes goals_conceded creativity influence threat bonus bps ict_index clean_sheets red_cards yellow_cards selected_by_percent now_cost element_type
287 Matt Turner 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 7.2 39 GK
723 Kevin Danso 0 0 16 842 18 34.4 242.0 72.0 0 104 34.9 1 0 0 0.0 44 DEF
123 Ethan Brierley 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.1 44 MID
250 Caleb Wiley 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.0 40 DEF
139 Myles Peart-Harris 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.0 45 MID
603 Willy Boly 0 0 6 146 1 0.4 51.0 17.0 0 26 6.8 0 0 1 0.1 42 DEF
587 John Ruddy 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.3 39 GK
466 James McConnell 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.1 43 MID
314 Iliman Ndiaye 9 0 114 2426 24 326.4 598.0 493.0 21 552 142.0 15 0 3 1.1 52 FWD
502 Rúben Gato Alves Dias 0 0 90 2269 21 173.8 478.2 150.0 7 410 80.2 11 0 4 5.5 55 DEF
344 Luke Harris 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.0 45 MID
558 Chido Obi-Martin 0 0 7 160 5 5.1 8.2 46.0 0 21 5.4 0 0 1 0.2 45 FWD
257 Mathis Amougou 0 0 1 7 0 0.9 1.0 0.0 0 2 0.2 0 0 0 0.0 45 MID
173 Ibrahim Osman 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.0 50 MID
742 Konstantinos Mavropanos 0 0 45 2035 39 74.9 497.0 160.0 4 234 73.2 2 1 4 0.1 43 DEF
12 Declan Rice 4 7 127 2823 29 959.6 733.0 399.0 17 627 208.8 11 1 5 4.2 63 MID
616 Omar Richards 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.0 45 DEF
251 Filip Jørgensen 0 0 18 540 9 0.0 157.6 0.0 0 59 15.9 1 0 0 0.4 42 GK
37 Leon Bailey 1 2 47 1132 19 235.8 146.4 325.0 2 174 70.3 2 0 3 0.8 62 MID
523 Bruno Borges Fernandes 8 12 174 3017 47 1407.7 1017.8 587.0 28 810 300.8 10 2 3 13.7 84 MID
In [5]:
df["Full_Name"] = df.apply(lambda row: row['first_name'] + ' ' + row['second_name'], axis = 1)
df["Full_Name"]
Out[5]:
0                       Fábio Ferreira Vieira
1                   Gabriel Fernando de Jesus
2                Gabriel dos Santos Magalhães
3                                 Kai Havertz
4                                   Karl Hein
                        ...                  
799                          Emmanuel Agbadou
800    Vítor Manuel de Oliveira Lopes Pereira
801                              Nasser Djiga
802                          Marshall Munetsi
803                               Mateus Mané
Name: Full_Name, Length: 804, dtype: object
In [6]:
df_topgoalscorers = df.loc[df["goals_scored"] > 15, "Full_Name"]
print(f" All top goalscorers :\n {df_topgoalscorers}")
 All top goalscorers :
 67      Ollie Watkins
135      Bryan Mbeumo
146       Yoane Wissa
464     Mohamed Salah
493    Erling Haaland
574    Alexander Isak
624        Chris Wood
Name: Full_Name, dtype: object
In [7]:
df.head(10)
Out[7]:
first_name second_name goals_scored assists total_points minutes goals_conceded creativity influence threat bonus bps ict_index clean_sheets red_cards yellow_cards selected_by_percent now_cost element_type Full_Name
0 Fábio Ferreira Vieira 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.0 54 MID Fábio Ferreira Vieira
1 Gabriel Fernando de Jesus 3 2 42 600 5 119.5 154.4 255.0 6 152 52.6 2 0 4 1.0 65 FWD Gabriel Fernando de Jesus
2 Gabriel dos Santos Magalhães 3 2 117 2363 22 208.8 584.6 287.0 9 459 108.2 10 0 4 12.2 61 DEF Gabriel dos Santos Magalhães
3 Kai Havertz 9 3 97 1872 20 269.0 467.6 711.0 14 343 144.8 7 0 5 6.2 77 FWD Kai Havertz
4 Karl Hein 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.0 40 GK Karl Hein
5 Jurriën Timber 1 3 114 2415 21 296.8 393.0 254.0 9 426 94.2 11 0 7 4.3 55 DEF Jurriën Timber
6 Jorge Luiz Frello Filho 0 0 20 701 7 88.8 44.8 8.0 0 112 14.2 2 0 5 0.2 46 MID Jorge Luiz Frello Filho
7 Jakub Kiwior 1 0 43 1117 15 91.3 267.2 32.0 2 178 39.2 3 0 1 3.8 49 DEF Jakub Kiwior
8 Gabriel Martinelli Silva 8 4 125 2284 25 587.8 559.8 912.0 6 422 206.1 10 0 1 3.1 65 MID Gabriel Martinelli Silva
9 Ethan Nwaneri 4 2 66 882 4 282.8 293.2 255.0 4 251 83.0 4 0 1 1.5 43 MID Ethan Nwaneri
In [8]:
df_topbonus= df.nlargest(10, 'bonus')
df_topbonus
Out[8]:
first_name second_name goals_scored assists total_points minutes goals_conceded creativity influence threat bonus bps ict_index clean_sheets red_cards yellow_cards selected_by_percent now_cost element_type Full_Name
464 Mohamed Salah 29 18 344 3374 40 1199.2 1577.0 1985.0 55 1133 476.0 15 0 1 66.3 136 MID Mohamed Salah
624 Chris Wood 20 3 200 2958 37 320.0 838.4 900.0 41 744 205.6 15 0 1 30.8 72 FWD Chris Wood
767 Matheus Santos Carneiro Da Cunha 15 7 178 2596 54 847.8 936.2 940.0 41 838 272.2 7 0 4 13.6 70 FWD Matheus Santos Carneiro Da Cunha
574 Alexander Isak 23 6 211 2758 41 573.1 1043.0 1320.0 36 848 292.7 12 0 1 49.4 94 FWD Alexander Isak
242 Cole Palmer 15 10 214 3193 41 1259.2 1068.2 1052.0 35 952 338.0 10 0 7 34.3 105 MID Cole Palmer
67 Ollie Watkins 16 8 186 2593 40 345.2 766.0 1148.0 32 631 223.8 10 0 2 29.0 92 FWD Ollie Watkins
146 Yoane Wissa 18 6 185 2921 45 400.0 849.4 1181.0 32 739 242.4 9 0 5 24.2 69 FWD Yoane Wissa
135 Bryan Mbeumo 20 9 236 3415 55 1107.5 1236.8 1060.0 29 948 340.8 9 0 3 46.7 83 MID Bryan Mbeumo
523 Bruno Borges Fernandes 8 12 174 3017 47 1407.7 1017.8 587.0 28 810 300.8 10 2 3 13.7 84 MID Bruno Borges Fernandes
277 Jean-Philippe Mateta 14 2 150 2642 37 405.4 671.6 800.0 27 576 187.3 9 0 2 8.3 75 FWD Jean-Philippe Mateta
In [9]:
df_highest_selected = df[df["selected_by_percent"] > 20]
df_highest_selected
Out[9]:
first_name second_name goals_scored assists total_points minutes goals_conceded creativity influence threat bonus bps ict_index clean_sheets red_cards yellow_cards selected_by_percent now_cost element_type Full_Name
11 David Raya Martin 0 0 142 3420 34 10.7 755.4 0.0 10 555 76.7 13 0 3 32.6 56 GK David Raya Martin
13 Bukayo Saka 6 11 127 1724 21 842.8 606.0 830.0 18 508 227.9 7 0 3 20.2 104 MID Bukayo Saka
14 William Saliba 2 0 130 3039 29 150.1 672.6 116.0 14 564 94.2 12 1 2 25.8 64 DEF William Saliba
63 Morgan Rogers 8 11 161 3115 40 722.7 730.6 689.0 15 534 214.2 10 0 10 25.8 58 MID Morgan Rogers
67 Ollie Watkins 16 8 186 2593 40 345.2 766.0 1148.0 32 631 223.8 10 0 2 29.0 92 FWD Ollie Watkins
135 Bryan Mbeumo 20 9 236 3415 55 1107.5 1236.8 1060.0 29 948 340.8 9 0 3 46.7 83 MID Bryan Mbeumo
146 Yoane Wissa 18 6 185 2921 45 400.0 849.4 1181.0 32 739 242.4 9 0 5 24.2 69 FWD Yoane Wissa
242 Cole Palmer 15 10 214 3193 41 1259.2 1068.2 1052.0 35 952 338.0 10 0 7 34.3 105 MID Cole Palmer
464 Mohamed Salah 29 18 344 3374 40 1199.2 1577.0 1985.0 55 1133 476.0 15 0 1 66.3 136 MID Mohamed Salah
474 Virgil van Dijk 3 1 143 3330 38 206.6 932.6 299.0 8 631 143.7 14 0 5 25.6 67 DEF Virgil van Dijk
492 Joško Gvardiol 5 0 153 3278 41 501.7 847.0 497.0 15 623 185.0 13 0 2 38.6 65 DEF Joško Gvardiol
493 Erling Haaland 22 3 181 2736 38 359.4 946.0 1511.0 26 795 281.8 10 0 2 24.0 149 FWD Erling Haaland
517 Pep Guardiola 0 0 113 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 29.2 15 AM Pep Guardiola
518 Omar Marmoush 7 1 73 1174 12 246.7 318.0 410.0 12 326 97.3 8 0 0 22.6 74 FWD Omar Marmoush
574 Alexander Isak 23 6 211 2758 41 573.1 1043.0 1320.0 36 848 292.7 12 0 1 49.4 94 FWD Alexander Isak
624 Chris Wood 20 3 200 2958 37 320.0 838.4 900.0 41 744 205.6 15 0 1 30.8 72 FWD Chris Wood
In [10]:
[
    method_name
    for method_name in dir(df.plot)
    if not method_name.startswith("_")
]
Out[10]:
['area',
 'bar',
 'barh',
 'box',
 'density',
 'hexbin',
 'hist',
 'kde',
 'line',
 'pie',
 'scatter']
In [11]:
df_2 = df[['total_points', 'minutes', 'Full_Name', 'threat', 'element_type']]
In [12]:
df.columns
Out[12]:
Index(['first_name', 'second_name', 'goals_scored', 'assists', 'total_points',
       'minutes', 'goals_conceded', 'creativity', 'influence', 'threat',
       'bonus', 'bps', 'ict_index', 'clean_sheets', 'red_cards',
       'yellow_cards', 'selected_by_percent', 'now_cost', 'element_type',
       'Full_Name'],
      dtype='object')
In [13]:
df_2['pscore'] = df_2['minutes']/df_2['total_points']
/var/folders/lz/5bcqk50d4p1_59b0v5spmshr0000gq/T/ipykernel_4016/2037718134.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2['pscore'] = df_2['minutes']/df_2['total_points']

fig, axs = plt.subplots(figsize=(12, 4)) Create an empty Matplotlib Figure and Axes¶

air_quality.plot.area(ax=axs) Use pandas to put the area plot on the prepared Figure/Axes¶

axs.set_ylabel("NO$_2$ concentration") Do any Matplotlib customization you like¶

fig.savefig("no2_concentrations.png") Save the Figure/Axes using the existing Matplotlib method.¶

plt.show() Display the plot¶

In [14]:
df_2
Out[14]:
total_points minutes Full_Name threat element_type pscore
0 0 0 Fábio Ferreira Vieira 0.0 MID NaN
1 42 600 Gabriel Fernando de Jesus 255.0 FWD 14.285714
2 117 2363 Gabriel dos Santos Magalhães 287.0 DEF 20.196581
3 97 1872 Kai Havertz 711.0 FWD 19.298969
4 0 0 Karl Hein 0.0 GK NaN
... ... ... ... ... ... ...
799 45 1410 Emmanuel Agbadou 67.0 DEF 31.333333
800 111 0 Vítor Manuel de Oliveira Lopes Pereira 0.0 AM 0.000000
801 4 65 Nasser Djiga 0.0 DEF 16.250000
802 50 1077 Marshall Munetsi 212.0 MID 21.540000
803 1 1 Mateus Mané 0.0 FWD 1.000000

804 rows × 6 columns

In [15]:
df_p = df_2[df_2['pscore'].notna()]
In [16]:
df_p
Out[16]:
total_points minutes Full_Name threat element_type pscore
1 42 600 Gabriel Fernando de Jesus 255.0 FWD 14.285714
2 117 2363 Gabriel dos Santos Magalhães 287.0 DEF 20.196581
3 97 1872 Kai Havertz 711.0 FWD 19.298969
5 114 2415 Jurriën Timber 254.0 DEF 21.184211
6 20 701 Jorge Luiz Frello Filho 8.0 MID 35.050000
... ... ... ... ... ... ...
799 45 1410 Emmanuel Agbadou 67.0 DEF 31.333333
800 111 0 Vítor Manuel de Oliveira Lopes Pereira 0.0 AM 0.000000
801 4 65 Nasser Djiga 0.0 DEF 16.250000
802 50 1077 Marshall Munetsi 212.0 MID 21.540000
803 1 1 Mateus Mané 0.0 FWD 1.000000

582 rows × 6 columns

In [17]:
sbn.scatterplot(data = df_p, x = 'minutes', y ='pscore', hue = 'element_type', size ='threat')
plt.title("Scatter Plot of minutes played and pscore for players")
plt.show()
No description has been provided for this image
In [18]:
plt.figure(figsize=(14,8))
df_p['element_type'] = df_p['element_type'].astype('category').cat.codes

plt.scatter(df_p['minutes'], df_p['total_points'], s = df_p['pscore'], c = df_p['element_type'], cmap = 'viridis', alpha =0.7)
plt.colorbar(label = 'Element Type')
plt.ylabel("Total points earned")
plt.xlabel("Minutes Played")
plt.title("Position Based Threat & Score")
plt.show()
/var/folders/lz/5bcqk50d4p1_59b0v5spmshr0000gq/T/ipykernel_4016/876716628.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_p['element_type'] = df_p['element_type'].astype('category').cat.codes
/Users/anair/.pyenv/versions/3.10.8/lib/python3.10/site-packages/matplotlib/collections.py:996: RuntimeWarning: invalid value encountered in sqrt
  scale = np.sqrt(self._sizes) * dpi / 72.0 * self._factor
No description has been provided for this image
In [19]:
fig, axs = plt.subplots(1,2, figsize =(16,6))

## First Plot
axs[0].scatter(df_p['minutes'], df_p['total_points'], s = df_p['pscore'], c = df_p['element_type'], cmap = 'viridis', alpha =0.7)
axs[0].set_ylabel("Total points earned")
axs[0].set_xlabel("Minutes Played")
axs[0].set_title("Position Based Threat & Score")
fig.colorbar(axs[0].collections[0], ax = axs[0], label ='element Type')

## Second Plot
df_top = df_p.sort_values(by='threat', ascending=False).head(20)
axs[1].barh(df_top['Full_Name'], df_top['threat'], color='skyblue')
axs[1].set_title("Players and threat score")
axs[1].set_xlabel("Top Players")
axs[1].set_label("Threat Score")
axs[1].invert_yaxis()

plt.tight_layout()
plt.show()
No description has been provided for this image

List comprehensions¶

Simplify traditional approach to one line¶

1.) squares = []¶

for number in range(11):¶

if number %2 == 0¶

squares.append(number**2)¶

squares = [number**2 for number in range(11) if number % 2 == 0]¶

Condition - For loop - if¶

In [20]:
df_p
Out[20]:
total_points minutes Full_Name threat element_type pscore
1 42 600 Gabriel Fernando de Jesus 255.0 2 14.285714
2 117 2363 Gabriel dos Santos Magalhães 287.0 1 20.196581
3 97 1872 Kai Havertz 711.0 2 19.298969
5 114 2415 Jurriën Timber 254.0 1 21.184211
6 20 701 Jorge Luiz Frello Filho 8.0 4 35.050000
... ... ... ... ... ... ...
799 45 1410 Emmanuel Agbadou 67.0 1 31.333333
800 111 0 Vítor Manuel de Oliveira Lopes Pereira 0.0 0 0.000000
801 4 65 Nasser Djiga 0.0 1 16.250000
802 50 1077 Marshall Munetsi 212.0 4 21.540000
803 1 1 Mateus Mané 0.0 2 1.000000

582 rows × 6 columns

In [21]:
df
Out[21]:
first_name second_name goals_scored assists total_points minutes goals_conceded creativity influence threat bonus bps ict_index clean_sheets red_cards yellow_cards selected_by_percent now_cost element_type Full_Name
0 Fábio Ferreira Vieira 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.0 54 MID Fábio Ferreira Vieira
1 Gabriel Fernando de Jesus 3 2 42 600 5 119.5 154.4 255.0 6 152 52.6 2 0 4 1.0 65 FWD Gabriel Fernando de Jesus
2 Gabriel dos Santos Magalhães 3 2 117 2363 22 208.8 584.6 287.0 9 459 108.2 10 0 4 12.2 61 DEF Gabriel dos Santos Magalhães
3 Kai Havertz 9 3 97 1872 20 269.0 467.6 711.0 14 343 144.8 7 0 5 6.2 77 FWD Kai Havertz
4 Karl Hein 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.0 40 GK Karl Hein
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
799 Emmanuel Agbadou 1 0 45 1410 22 57.3 395.2 67.0 1 223 52.0 4 0 3 0.4 40 DEF Emmanuel Agbadou
800 Vítor Manuel de Oliveira Lopes Pereira 0 0 111 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 2.3 8 AM Vítor Manuel de Oliveira Lopes Pereira
801 Nasser Djiga 0 0 4 65 2 2.1 5.6 0.0 0 11 0.8 0 0 0 0.0 45 DEF Nasser Djiga
802 Marshall Munetsi 2 2 50 1077 11 132.9 176.8 212.0 4 134 52.3 5 0 0 0.1 50 MID Marshall Munetsi
803 Mateus Mané 0 0 1 1 0 0.3 0.0 0.0 0 3 0.0 0 0 0 0.1 45 FWD Mateus Mané

804 rows × 20 columns

Top Performers by Position¶

In [22]:
df_p.sample(5)
Out[22]:
total_points minutes Full_Name threat element_type pscore
779 87 2975 João Victor Gomes da Silva 231.0 4 34.195402
457 127 1921 Cody Gakpo 686.0 2 15.125984
334 27 483 Reiss Nelson 195.0 4 17.888889
583 34 783 Sean Longstaff 76.0 4 23.029412
317 105 2922 James Tarkowski 217.0 1 27.828571
In [23]:
high_perf = df_p.groupby('element_type').apply(lambda x: x.nlargest(3, 'total_points'))[['Full_Name', 'total_points', 'minutes']]
high_perf
/var/folders/lz/5bcqk50d4p1_59b0v5spmshr0000gq/T/ipykernel_4016/724788543.py:1: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  high_perf = df_p.groupby('element_type').apply(lambda x: x.nlargest(3, 'total_points'))[['Full_Name', 'total_points', 'minutes']]
Out[23]:
Full_Name total_points minutes
element_type
0 330 David Moyes 137 0
297 Oliver Glasner 128 0
208 Fabian Hürzeler 119 0
1 492 Joško Gvardiol 153 3278
448 Trent Alexander-Arnold 148 2362
627 Nikola Milenković 145 3330
2 574 Alexander Isak 211 2758
624 Chris Wood 200 2958
67 Ollie Watkins 186 2593
3 316 Jordan Pickford 158 3420
621 Matz Sels 150 3420
11 David Raya Martin 142 3420
4 464 Mohamed Salah 344 3374
135 Bryan Mbeumo 236 3415
242 Cole Palmer 214 3193

Efficiency Metric¶

In [24]:
df_p
Out[24]:
total_points minutes Full_Name threat element_type pscore
1 42 600 Gabriel Fernando de Jesus 255.0 2 14.285714
2 117 2363 Gabriel dos Santos Magalhães 287.0 1 20.196581
3 97 1872 Kai Havertz 711.0 2 19.298969
5 114 2415 Jurriën Timber 254.0 1 21.184211
6 20 701 Jorge Luiz Frello Filho 8.0 4 35.050000
... ... ... ... ... ... ...
799 45 1410 Emmanuel Agbadou 67.0 1 31.333333
800 111 0 Vítor Manuel de Oliveira Lopes Pereira 0.0 0 0.000000
801 4 65 Nasser Djiga 0.0 1 16.250000
802 50 1077 Marshall Munetsi 212.0 4 21.540000
803 1 1 Mateus Mané 0.0 2 1.000000

582 rows × 6 columns

In [25]:
eff = df_p.query('minutes>=750').nsmallest(10,'pscore')[['Full_Name', 'threat']]
eff
Out[25]:
Full_Name threat
464 Mohamed Salah 1985.0
349 Rodrigo Muniz Carvalho 508.0
574 Alexander Isak 1320.0
463 Luis Díaz 946.0
9 Ethan Nwaneri 255.0
566 Harvey Barnes 701.0
694 James Maddison 495.0
13 Bukayo Saka 830.0
453 Diogo Teixeira da Silva 537.0
67 Ollie Watkins 1148.0
In [26]:
df.sample(5)
Out[26]:
first_name second_name goals_scored assists total_points minutes goals_conceded creativity influence threat bonus bps ict_index clean_sheets red_cards yellow_cards selected_by_percent now_cost element_type Full_Name
38 Ross Barkley 3 1 44 567 10 128.1 215.4 206.0 5 166 54.8 1 0 3 0.2 52 MID Ross Barkley
361 Martial Godo 0 0 2 21 0 12.0 3.4 14.0 0 4 3.0 0 0 0 0.0 44 MID Martial Godo
461 Caoimhin Kelleher 0 0 45 900 12 0.0 224.2 0.0 1 156 22.4 4 0 0 1.2 39 GK Caoimhin Kelleher
356 Carlos Vinícius Alves Morais 0 0 3 13 1 0.0 4.0 19.0 0 14 2.3 0 0 0 0.3 50 FWD Carlos Vinícius Alves Morais
683 Bryan Gil Salvatierra 0 0 0 0 0 0.0 0.0 0.0 0 0 0.0 0 0 0 0.0 50 MID Bryan Gil Salvatierra
In [27]:
df['gc'] = df['goals_scored'] + df['assists']
df['gc_per_game'] = (df['gc'] / df['minutes']) * 90
df.query('minutes > 0').sort_values('gc_per_game', ascending=False).head(10)
Out[27]:
first_name second_name goals_scored assists total_points minutes goals_conceded creativity influence threat ... ict_index clean_sheets red_cards yellow_cards selected_by_percent now_cost element_type Full_Name gc gc_per_game
700 Dane Scarlett 0 1 6 31 1 11.0 24.4 17.0 ... 5.2 0 0 0 0.7 44 FWD Dane Scarlett 1 2.903226
296 Romain Esse 1 2 22 138 3 38.4 74.8 51.0 ... 16.4 0 0 0 0.1 47 MID Romain Esse 3 1.956522
278 Matheus França de Oliveira 1 0 9 52 0 1.2 48.4 66.0 ... 11.5 0 0 0 0.2 44 MID Matheus França de Oliveira 1 1.730769
737 Danny Ings 1 3 32 270 4 76.5 97.0 159.0 ... 33.4 0 0 0 1.2 48 FWD Danny Ings 4 1.333333
464 Mohamed Salah 29 18 344 3374 40 1199.2 1577.0 1985.0 ... 476.0 15 0 1 66.3 136 MID Mohamed Salah 47 1.253705
254 Tyrique George 1 1 17 177 0 72.7 67.4 32.0 ... 17.1 1 0 1 0.0 45 MID Tyrique George 2 1.016949
47 Jhon Durán 7 0 53 622 14 68.9 273.8 334.0 ... 67.5 2 1 3 1.6 57 FWD Jhon Durán 7 1.012862
574 Alexander Isak 23 6 211 2758 41 573.1 1043.0 1320.0 ... 292.7 12 0 1 49.4 94 FWD Alexander Isak 29 0.946338
358 Ryan Sessegnon 4 2 57 574 10 158.8 284.0 227.0 ... 67.1 1 0 1 2.6 42 DEF Ryan Sessegnon 6 0.940767
77 Donyell Malen 3 0 34 296 6 42.7 138.2 208.0 ... 38.5 0 0 0 0.2 53 MID Donyell Malen 3 0.912162

10 rows × 22 columns

In [28]:
no_rules = df.query('yellow_cards > 5 or red_cards > 1').sort_values('yellow_cards', ascending=False)[['Full_Name', 'yellow_cards','selected_by_percent', 'influence', 'gc_per_game']]
no_rules.head(15)
Out[28]:
Full_Name yellow_cards selected_by_percent influence gc_per_game
666 Flynn Downes 12 0.0 297.6 0.041880
347 Saša Lukić 12 0.0 335.6 0.076661
375 Liam Delap 12 3.3 513.0 0.487051
274 Will Hughes 11 0.0 335.2 0.171429
569 Dan Burn 11 12.6 736.6 0.054054
549 Manuel Ugarte 11 0.1 313.6 0.151941
219 Moisés Caicedo Corozo 11 1.0 627.6 0.134288
626 Ryan Yates 10 0.1 344.4 0.189374
107 Dean Huijsen 10 5.0 826.4 0.185797
576 Joelinton Cássio Apolinário de Lira 10 1.1 469.8 0.300752
281 Daniel Muñoz 10 19.3 816.6 0.278724
385 Sam Morsy 10 0.1 417.0 0.065478
741 Lucas Tolentino Coelho de Lima 10 0.9 433.2 0.189553
601 Elliot Anderson 10 0.3 638.8 0.264123
63 Morgan Rogers 10 25.8 730.6 0.548957

ICT Index Breakdown - influence, threat and creativity¶

In [29]:
# Ensure these columns are numeric
cols = ['influence', 'threat', 'creativity']
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')

# Now you can safely scale them
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df[['norm_influence', 'norm_threat', 'norm_creativity']] = scaler.fit_transform(df[cols])

# Compute the custom ICT score
df['ict_custom'] = (
    0.5 * df['norm_influence'] +
    0.3 * df['norm_threat'] +
    0.2 * df['norm_creativity']
)

# View the top 10
print(df[['first_name', 'second_name', 'ict_custom']].sort_values('ict_custom', ascending=False).head(10))
    first_name               second_name  ict_custom
464    Mohamed                     Salah    0.970377
135      Bryan                    Mbeumo    0.709687
242       Cole                    Palmer    0.676575
574  Alexander                      Isak    0.611611
523      Bruno          Borges Fernandes    0.611417
493     Erling                   Haaland    0.579361
767    Matheus  Santos Carneiro Da Cunha    0.559347
730     Jarrod                     Bowen    0.554502
99     Antoine                   Semenyo    0.531014
146      Yoane                     Wissa    0.504628
In [30]:
cor_matrix = df.corr(numeric_only=True)
cor_matrix['total_points'].sort_values(ascending=False)
Out[30]:
total_points           1.000000
bps                    0.930018
ict_custom             0.924956
ict_index              0.911029
norm_influence         0.888418
influence              0.888418
clean_sheets           0.860499
minutes                0.855952
bonus                  0.836223
gc                     0.812560
norm_threat            0.789423
threat                 0.789423
creativity             0.758385
norm_creativity        0.758385
assists                0.744160
goals_conceded         0.741313
goals_scored           0.738368
selected_by_percent    0.681181
yellow_cards           0.566471
gc_per_game            0.444055
now_cost               0.434292
red_cards              0.195832
Name: total_points, dtype: float64

Popularity & Performance¶

In [31]:
high_perf = df['total_points'] > df['total_points'].quantile(0.6)
low_select = df['selected_by_percent'] < df['selected_by_percent'].quantile(0.4)
outliers = df[high_perf & low_select]
In [32]:
import plotly.express as px
import plotly.io as pio

pio.renderers.default = 'notebook'  # or 'browser', 'iframe', etc.

fig = px.scatter(
    outliers,
    x = 'selected_by_percent',
    y = 'total_points',
    hover_data=['Full_Name', 'element_type'],
    labels = {'selected_by_percent': '% Selected by Managers',
              'total_points': 'Total Fantasy Points'
             }
)
fig.update_traces(marker = dict(size=10, color='red'), textposition='top center')
fig.update_layout(height=500)

fig.show()
In [33]:
pip install plotly
Requirement already satisfied: plotly in ./.pyenv/versions/3.10.8/lib/python3.10/site-packages (6.2.0)
Requirement already satisfied: narwhals>=1.15.1 in ./.pyenv/versions/3.10.8/lib/python3.10/site-packages (from plotly) (1.48.1)
Requirement already satisfied: packaging in ./.pyenv/versions/3.10.8/lib/python3.10/site-packages (from plotly) (24.0)

[notice] A new release of pip is available: 24.3.1 -> 25.1.1
[notice] To update, run: pip install --upgrade pip
Note: you may need to restart the kernel to use updated packages.
In [34]:
print(outliers.shape)
(12, 26)
In [35]:
outliers
Out[35]:
first_name second_name goals_scored assists total_points minutes goals_conceded creativity influence threat ... selected_by_percent now_cost element_type Full_Name gc gc_per_game norm_influence norm_threat norm_creativity ict_custom
84 David Brooks 2 1 52 941 12 316.7 193.6 300.0 ... 0.0 49 MID David Brooks 3 0.286929 0.122765 0.151134 0.224977 0.151718
202 Matt O'Riley 2 3 50 929 20 322.5 211.4 219.0 ... 0.0 55 MID Matt O'Riley 5 0.484392 0.134052 0.110327 0.229097 0.145944
274 Will Hughes 0 4 67 2100 31 462.6 335.2 90.0 ... 0.0 49 MID Will Hughes 4 0.171429 0.212555 0.045340 0.328621 0.185604
305 James Garner 0 1 42 1590 19 248.2 264.8 93.0 ... 0.0 49 MID James Garner 1 0.056604 0.167914 0.046851 0.176316 0.133275
347 Saša Lukić 0 2 56 2348 37 455.1 335.6 119.0 ... 0.0 48 MID Saša Lukić 2 0.076661 0.212809 0.059950 0.323293 0.189048
362 Sander Berge 0 0 53 2221 35 285.7 314.4 51.0 ... 0.0 50 MID Sander Berge 0 0.000000 0.199366 0.025693 0.202955 0.147982
399 Jack Clarke 0 4 53 1161 31 341.7 230.8 151.0 ... 0.0 54 MID Jack Clarke 4 0.310078 0.146354 0.076071 0.242736 0.144545
609 Nicolás Domínguez 0 1 54 1954 27 247.2 410.2 237.0 ... 0.0 48 MID Nicolás Domínguez 1 0.046059 0.260114 0.119395 0.175606 0.200997
649 Kamaldeen Sulemana 1 2 53 1394 36 182.4 187.2 290.0 ... 0.0 50 MID Kamaldeen Sulemana 3 0.193687 0.118706 0.146096 0.129573 0.129097
666 Flynn Downes 1 0 48 2149 49 292.2 297.6 118.0 ... 0.0 47 MID Flynn Downes 1 0.041880 0.188713 0.059446 0.207573 0.153705
753 Carlos Soler 1 1 47 1394 25 295.8 189.0 186.0 ... 0.0 50 MID Carlos Soler 2 0.129125 0.119848 0.093703 0.210130 0.130061
794 André Trindade da Costa Neto 0 0 61 2472 46 285.5 383.2 30.0 ... 0.0 50 MID André Trindade da Costa Neto 0 0.000000 0.242993 0.015113 0.202813 0.166593

12 rows × 26 columns

In [ ]: